너도 기자냐 나도 기자다

Author

손채은

데이터

지도

데이터는 오마이뉴스 GitHub에서 가져옵니다.

library(tidyverse) #tidyverse 는 실행이 필요한 란에서 항시 실행 필요
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
map_sf <- sf::st_read("data/2020_21_elec_253_simple.json")
Reading layer `2020_21_elec_253_simple' from data source 
  `C:\Users\user\Documents\openai\data\2020_21_elec_253_simple.json' 
  using driver `GeoJSON'
Simple feature collection with 253 features and 4 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 124.6098 ymin: 33.16123 xmax: 130.9175 ymax: 38.61369
Geodetic CRS:  WGS 84
plot(map_sf)

st_geometry(map_sf) |> plot() #st_geometry(map_sf) : 지도만 뜯어내고 싶은 경우

st_drop_geometry(map_sf) |> as_tibble() #지도는 빼고 데이터만 뽑기
# A tibble: 253 × 4
   SGG_Code SGG_1 SGG_2                               SGG_3                  
      <int> <chr> <chr>                               <chr>                  
 1  2270202 대구  대구광역시 동구을                   대구 동구을            
 2  2270101 대구  대구광역시 중구남구                 대구 중구남구          
 3  2412001 경기  경기도 고양시갑                     경기 고양갑            
 4  2411002 경기  경기도 안양시동안구을               경기 안양동안을        
 5  2480802 경남  경상남도 김해시을                   경남 김해을            
 6  2410301 경기  경기도 수원시병                     경기 수원병            
 7  2280702 인천  인천광역시 계양구을                 인천 계양을            
 8  2110601 서울  서울특별시 동대문구갑               서울 동대문갑          
 9  2460702 전남  전라남도 순천시광양시곡성군구례군을 전남 순천광양곡성구례을
10  2440103 충남  충청남도 천안시병                   충남 천안병            
# ℹ 243 more rows

후보 데이터

library(rvest) #파이썬 BeautifulSoup
library(stringr)
library(tidyverse)

wiki_url <- "https://ko.wikipedia.org/wiki/%EB%8C%80%ED%95%9C%EB%AF%BC%EA%B5%AD_%EC%A0%9C21%EB%8C%80_%EA%B5%AD%ED%9A%8C%EC%9D%98%EC%9B%90_%EC%84%A0%EA%B1%B0_%EB%8D%94%EB%B6%88%EC%96%B4%EB%AF%BC%EC%A3%BC%EB%8B%B9_%ED%9B%84%EB%B3%B4_%EB%AA%A9%EB%A1%9D"

sido_name <- read_html(wiki_url) |> 
  html_nodes("h3") |> 
  html_text() |> 
  str_remove("\\[편집\\]")

minju_lst <- read_html(wiki_url) |> 
  html_nodes(".wikitable") |> #node, nodes : 테이블 하나, 테이블 전체의 차이
  html_table()

minju_lst[[1]]
minju_lst[[17]]

names(minju_lst) <- sido_name

#벡터를 데이터프레임으로 변경
library(janitor)
library(dplyr)

nomination <- sido_name |> 
  enframe(value = "시도명", name = '순번') |>  # 순번지정정
  mutate(data = minju_lst) |>  # df형태로 바뀜 
  unnest(data) |>              # df을 전부 풀어놓음(시도명 옆에 쭉 붙음)
  select(-순번) |> 
  janitor::clean_names(ascii = FALSE) 

nomination |> 
  slice_sample(n = 10) |> 
  select(-경력) |> 
  gt::gt()

nomination |> 
  #write_rds("data/nomination.rds")
  write_csv("data/nomination.csv")

출판

요약표

library(tidyverse)
library(gt)
library(gtExtras)

nomination <- read_rds("data/nomination.rds") #위 코드 저장 후 코드 실행을 막아놨을 경우 사용

nomi_table <- nomination |>
  count(공천_유형, name = "선거구수", sort = TRUE) |>
  #arrange(sesc(선거구수)) |>
  mutate(비율 = 선거구수 / sum(선거구수)) |>
  janitor::adorn_totals(name = "합계")

library(gt)
library(gtExtras)

nomi_gt <- nomi_table |>
  gt() |>
  gt_theme_538() |>
  cols_align(align = "center") |>
  fmt_percent(columns = 비율, decimals = 1) |>
    tab_options(
      heading.title.font.size = px(18L),
      column_labels.font.size = px(14L),
      table.font.size = px(11L)
    ) |>
  tab_header(
    title = md("더불어민주당 **공천** 유형"),
    subtitle = md("제21대 국회의원 선거")
  ) |>
  tab_style(
    style = cell_fill(color = "gray90"),
    locations = cells_body(
      rows = 공천_유형 == "단수 공천"
    )
  ) |>
  tab_style(
    style = cell_text(color = "red"),
    locations = cells_body(
      rows = 공천_유형 == "단수 공천",
      columns = 비율
    )
  )

nomi_gt

nomi_gt |> gt::gtsave(filename = "data/nomi_gt.png")

민주당 공천 유형 통계표

지도 시각화

library(tidyverse)
library(sf)

extrafont::loadfonts()

precinct_nomination_tbl <-
  readxl::read_excel("data/precinct_nomination.xlsx")

precinct <- st_read("data/2020_21_elec_253_simple.json") |>
  st_set_crs(4326)
Reading layer `2020_21_elec_253_simple' from data source 
  `C:\Users\user\Documents\openai\data\2020_21_elec_253_simple.json' 
  using driver `GeoJSON'
Simple feature collection with 253 features and 4 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 124.6098 ymin: 33.16123 xmax: 130.9175 ymax: 38.61369
Geodetic CRS:  WGS 84
precinct_nomination_sf <-
  left_join(precinct |> select(SGG_Code, geometry),
            precinct_nomination_tbl,
            by = "SGG_Code")

st_geometry(precinct_nomination_sf) |> plot()

nomination_type_gg <- ggplot() +
  geom_sf(data = precinct_nomination_sf,
          aes(geometry = geometry, fill = 공천_유형)) +
  theme_void(base_family = "MaruBuri") + 
  theme(legend.position = "left") +
  scale_fill_manual(values = c("경선 공천" = "gray90",
                               "단수 공천" = "red",
                               "전략 공천" = "pink",
                               "청년 경선 공천" = "black")) +
    labs(title = "대한민국 제21대 국회의원 선거 더불어민주당 후보",
         caption = "자료출처: 위키백과")


metro_bb <- precinct_nomination_sf |> 
  filter(SGG_1 == "경기") |> st_bbox()

precinct_nomination_sf |> 
  filter(SGG_1 == "경기") |> 
  st_geometry() |> 
  plot()

metro_nomination_type_gg <- ggplot() +
  geom_sf(data = precinct_nomination_sf |> filter(SGG_1 %in% c("서울", "경기")), 
          aes(geometry = geometry, fill = 공천_유형)) +
  theme_void(base_family = "MaruBuri") + 
  coord_sf(xlim = c(metro_bb['xmin'], metro_bb['xmax']),
           ylim = c(metro_bb['ymin'], metro_bb['ymax']), expand = FALSE) +
  theme(legend.position = "none") +
  scale_fill_manual(values = c("경선 공천" = "gray90",
                               "단수 공천" = "red",
                               "전략 공천" = "pink",
                               "청년 경선 공천" = "black")) +
    ggrepel::geom_text_repel(
      data = precinct_nomination_sf |> filter(SGG_1 %in% c("서울", "경기")) |> 
        mutate(SGG_3 = glue::glue("{ifelse(str_detect(공천_유형,'단수'), SGG_3, '')}")),
        aes(label = SGG_3, geometry = geometry), stat = "sf_coordinates", 
        min.segment.length = 1, size = 4, max.overlaps = Inf
    ) 


# 전체 지도, 시 지도 합치기
library(patchwork)

nomination_type_gg / metro_nomination_type_gg
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database

Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database

Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
give correct results for longitude/latitude data
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

nomination_type_final <- nomination_type_gg + metro_nomination_type_gg +
  plot_layout(widths = c(1,2))

nomination_type_final
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
give correct results for longitude/latitude data
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database

ggplot2::ggsave("data/two_map.png")
Saving 7 x 5 in image
Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
give correct results for longitude/latitude data
ragg::agg_jpeg("data/two_map.jpg",
               width = 10, height = 7, units = "in", res = 600)
nomination_type_final
Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
give correct results for longitude/latitude data
dev.off()
png 
  2 

``` # Self-contained TRUE

format: html: self-contained: true